home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Language/OS - Multiplatform Resource Library
/
LANGUAGE OS.iso
/
a_utils
/
yacc
/
flexyacc
/
aflex.lha
/
aflex
/
src
/
ascan.l
< prev
next >
Wrap
Text File
|
1992-12-29
|
13KB
|
526 lines
-- Copyright (c) 1990 Regents of the University of California.
-- All rights reserved.
--
-- This software was developed by John Self of the Arcadia project
-- at the University of California, Irvine.
--
-- Redistribution and use in source and binary forms are permitted
-- provided that the above copyright notice and this paragraph are
-- duplicated in all such forms and that any documentation,
-- advertising materials, and other materials related to such
-- distribution and use acknowledge that the software was developed
-- by the University of California, Irvine. The name of the
-- University may not be used to endorse or promote products derived
-- from this software without specific prior written permission.
-- THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
-- IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
-- WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
-- TITLE scanner specification file
-- AUTHOR: John Self (UCI)
-- DESCRIPTION regular expressions and actions matching tokens
-- that aflex expects to find in its input.
-- NOTES input to aflex (NOT alex.) It uses exclusive start conditions
-- and case insensitive scanner generation available only in aflex
-- (or flex if you use C.)
-- generate scanner using the command 'aflex -is ascan.l'
-- $Header: /dc/uc/self/arcadia/aflex/ada/src/RCS/ascan.l,v 1.19 1991/12/03 23:08:24 self Exp self $
%x SECT2 SECT2PROLOG SECT3 PICKUPDEF SC CARETISBOL NUM QUOTE
%x FIRSTCCL CCL ACTION RECOVER BRACEERROR
%x ACTION_STRING
WS [ \t\f]+
OPTWS [ \t\f]*
NOT_WS [^ \t\f\n]
NAME [a-z_][a-z_0-9-]*
NOT_NAME [^a-z_\n]+
SCNAME {NAME}
ESCSEQ \\([^\n]|[0-9]{1,3})
%%
^{WS} { indented_code := true; }
^#.*\n { linenum := linenum + 1; ECHO;
-- treat as a comment;
}
^{OPTWS}"--".*\n { linenum := linenum + 1; ECHO; }
^"%s"(tart)? { return ( SCDECL ); }
^"%x" { return ( XSCDECL ); }
{WS} { return ( WHITESPACE ); }
^"%%".* {
sectnum := 2;
misc.line_directive_out;
ENTER(SECT2PROLOG);
return ( SECTEND );
}
^"%"[^%sx]" ".*\n {
text_io.put( Standard_Error, "old-style lex command at line " );
int_io.put( Standard_Error, linenum );
text_io.put( Standard_Error, "ignored:" );
text_io.new_line( Standard_Error );
text_io.put( Standard_Error, ASCII.HT );
text_io.put( Standard_Error, yytext(1..YYLength) );
linenum := linenum + 1;
}
^{NAME} {
nmstr := vstr(yytext(1..YYLength));
didadef := false;
ENTER(PICKUPDEF);
}
{SCNAME} { nmstr := vstr(yytext(1..YYLength));
return NAME;
}
^{OPTWS}\n { linenum := linenum + 1;
-- allows blank lines in section 1;
}
{OPTWS}\n { linenum := linenum + 1; return Newline; }
. { misc.synerr( "illegal character" );ENTER(RECOVER);}
<PICKUPDEF>{WS} { null;
-- separates name and definition;
}
<PICKUPDEF>{NOT_WS}.* {
nmdef := vstr(yytext(1..YYLength));
i := tstring.len( nmdef );
while ( i >= tstring.first ) loop
if ( (CHAR(nmdef,i) /= ' ') and
(CHAR(nmdef,i) /= ASCII.HT) ) then
exit;
end if;
i := i - 1;
end loop;
sym.ndinstal( nmstr,
tstring.slice(nmdef, tstring.first, i) );
didadef := true;
}
<PICKUPDEF>\n {
if ( not didadef ) then
misc.synerr( "incomplete name definition" );
end if;
ENTER(0);
linenum := linenum + 1;
}
<RECOVER>.*\n { linenum := linenum + 1;
ENTER(0);
nmstr := vstr(yytext(1..YYLength));
return NAME;
}
<SECT2PROLOG>.*\n/{NOT_WS} {
linenum := linenum + 1;
ACTION_ECHO;
MARK_END_OF_PROLOG;
ENTER(SECT2);
}
<SECT2PROLOG>.*\n { linenum := linenum + 1; ACTION_ECHO; }
<SECT2PROLOG><<EOF>> { MARK_END_OF_PROLOG;
return End_Of_Input;
}
<SECT2>^{OPTWS}\n { linenum := linenum + 1;
-- allow blank lines in sect2;}
-- this rule matches indented lines which
-- are not comments.
<SECT2>^{WS}{NOT_WS}"--".*\n {
misc.synerr("indented code found outside of action");
linenum := linenum + 1;
}
<SECT2>"<" { ENTER(SC); return ( '<' ); }
<SECT2>^"^" { return ( '^' ); }
<SECT2>\" { ENTER(QUOTE); return ( '"' ); }
<SECT2>"{"/[0-9] { ENTER(NUM); return ( '{' ); }
<SECT2>"{"[^0-9\n][^}\n]* { ENTER(BRACEERROR); }
<SECT2>"$"/[ \t\n] { return ( '$' ); }
<SECT2>{WS}"|".*\n { continued_action := true;
linenum := linenum + 1;
return Newline;
}
<SECT2>^{OPTWS}"--".*\n { linenum := linenum + 1; ACTION_ECHO; }
<SECT2>{WS} {
-- this rule is separate from the one below because
-- otherwise we get variable trailing context, so
-- we can't build the scanner using -{f,F}
bracelevel := 0;
continued_action := false;
ENTER(ACTION);
return Newline;
}
<SECT2>{OPTWS}/\n {
bracelevel := 0;
continued_action := false;
ENTER(ACTION);
return Newline;
}
<SECT2>^{OPTWS}\n { linenum := linenum + 1; return Newline; }
<SECT2>"<<EOF>>" { return ( EOF_OP ); }
<SECT2>^"%%".* {
sectnum := 3;
ENTER(SECT3);
return ( End_Of_Input );
-- to stop the parser
}
<SECT2>"["([^\\\]\n]|{ESCSEQ})+"]" {
nmstr := vstr(yytext(1..YYLength));
-- check to see if we've already encountered this ccl
cclval := sym.ccllookup( nmstr );
if ( cclval /= 0 ) then
yylval := cclval;
cclreuse := cclreuse + 1;
return ( PREVCCL );
else
-- we fudge a bit. We know that this ccl will
-- soon be numbered as lastccl + 1 by cclinit
sym.cclinstal( nmstr, lastccl + 1 );
-- push back everything but the leading bracket
-- so the ccl can be rescanned
PUT_BACK_STRING(nmstr, 1);
ENTER(FIRSTCCL);
return ( '[' );
end if;
}
<SECT2>"{"{NAME}"}" {
nmstr := vstr(yytext(1..YYLength));
-- chop leading and trailing brace
tmpbuf := slice(vstr(yytext(1..YYLength)),
2, YYLength-1);
nmdefptr := sym.ndlookup( tmpbuf );
if ( nmdefptr = NUL ) then
misc.synerr( "undefined {name}" );
else
-- push back name surrounded by ()'s
unput(')');
PUT_BACK_STRING(nmdefptr, 0);
unput('(');
end if;
}
<SECT2>[/|*+?.()] { tmpbuf := vstr(yytext(1..YYLength));
case tstring.CHAR(tmpbuf,1) is
when '/' => return '/';
when '|' => return '|';
when '*' => return '*';
when '+' => return '+';
when '?' => return '?';
when '.' => return '.';
when '(' => return '(';
when ')' => return ')';
when others =>
misc.aflexerror("error in aflex case");
end case;
}
<SECT2>. { tmpbuf := vstr(yytext(1..YYLength));
yylval := CHARACTER'POS(CHAR(tmpbuf,1));
return CHAR;
}
<SECT2>\n { linenum := linenum + 1; return Newline; }
<SC>"," { return ( ',' ); }
<SC>">" { ENTER(SECT2); return ( '>' ); }
<SC>">"/"^" { ENTER(CARETISBOL); return ( '>' ); }
<SC>{SCNAME} { nmstr := vstr(yytext(1..YYLength));
return NAME;
}
<SC>. { misc.synerr( "bad start condition name" ); }
<CARETISBOL>"^" { ENTER(SECT2); return ( '^' ); }
<QUOTE>[^"\n] { tmpbuf := vstr(yytext(1..YYLength));
yylval := CHARACTER'POS(CHAR(tmpbuf,1));
return CHAR;
}
<QUOTE>\" { ENTER(SECT2); return ( '"' ); }
<QUOTE>\n {
misc.synerr( "missing quote" );
ENTER(SECT2);
linenum := linenum + 1;
return ( '"' );
}
<FIRSTCCL>"^"/[^-\n] { ENTER(CCL); return ( '^' ); }
<FIRSTCCL>"^"/- { return ( '^' ); }
<FIRSTCCL>- { ENTER(CCL); yylval := CHARACTER'POS('-'); return ( CHAR ); }
<FIRSTCCL>. { ENTER(CCL);
tmpbuf := vstr(yytext(1..YYLength));
yylval := CHARACTER'POS(CHAR(tmpbuf,1));
return CHAR;
}
<CCL>-/[^\]\n] { return ( '-' ); }
<CCL>[^\]\n] { tmpbuf := vstr(yytext(1..YYLength));
yylval := CHARACTER'POS(CHAR(tmpbuf,1));
return CHAR;
}
<CCL>"]" { ENTER(SECT2); return ( ']' ); }
<NUM>[0-9]+ {
yylval := misc.myctoi( vstr(yytext(1..YYLength)) );
return ( NUMBER );
}
<NUM>"," { return ( ',' ); }
<NUM>"}" { ENTER(SECT2); return ( '}' ); }
<NUM>. {
misc.synerr( "bad character inside {}'s" );
ENTER(SECT2);
return ( '}' );
}
<NUM>\n {
misc.synerr( "missing }" );
ENTER(SECT2);
linenum := linenum + 1;
return ( '}' );
}
<BRACEERROR>"}" { misc.synerr( "bad name in {}'s" ); ENTER(SECT2); }
<BRACEERROR>\n { misc.synerr( "missing }" );
linenum := linenum + 1;
ENTER(SECT2);
}
<ACTION>"{" { bracelevel := bracelevel + 1; }
<ACTION>"}" { bracelevel := bracelevel - 1; }
<ACTION>[^a-z_{}"'/\n]+ { ACTION_ECHO; }
<ACTION>{NAME} { ACTION_ECHO; }
<ACTION>"--".*\n { linenum := linenum + 1; ACTION_ECHO; }
<ACTION>"'"([^'\\\n]|\\.)*"'" { ACTION_ECHO;
-- character constant;
}
<ACTION>\" { ACTION_ECHO; ENTER(ACTION_STRING); }
<ACTION>\n {
linenum := linenum + 1;
ACTION_ECHO;
if ( bracelevel = 0 ) then
text_io.new_line ( temp_action_file );
ENTER(SECT2);
end if;
}
<ACTION>. { ACTION_ECHO; }
<ACTION_STRING>[^"\\\n]+ { ACTION_ECHO; }
<ACTION_STRING>\\. { ACTION_ECHO; }
<ACTION_STRING>\n { linenum := linenum + 1; ACTION_ECHO; }
<ACTION_STRING>\" { ACTION_ECHO; ENTER(ACTION); }
<ACTION_STRING>. { ACTION_ECHO; }
<SECT2,QUOTE,CCL>{ESCSEQ} {
yylval := CHARACTER'POS(misc.myesc( vstr(yytext(1..YYLength)) ));
return ( CHAR );
}
<FIRSTCCL>{ESCSEQ} {
yylval := CHARACTER'POS(misc.myesc( vstr(yytext(1..YYLength)) ));
ENTER(CCL);
return ( CHAR );
}
<SECT3>.*(\n?) { if ( check_yylex_here ) then
return End_Of_Input;
else
ECHO;
end if;
}
%%
with misc_defs, misc, sym, parse_tokens, int_io;
with tstring, ascan_dfa, ascan_io, external_file_manager;
use misc_defs, parse_tokens, tstring;
use ascan_dfa, ascan_io, external_file_manager;
package scanner is
call_yylex : boolean := false;
function get_token return Token;
end scanner;
package body scanner is
beglin : boolean := false;
i, bracelevel: integer;
function get_token return Token is
toktype : Token;
didadef, indented_code : boolean;
cclval : integer;
nmdefptr : vstring;
nmdef, tmpbuf : vstring;
procedure ACTION_ECHO is
begin
text_io.put( temp_action_file, yytext(1..YYLength) );
end ACTION_ECHO;
procedure MARK_END_OF_PROLOG is
begin
text_io.put( temp_action_file, "%%%% end of prolog" );
text_io.new_line( temp_action_file );
end MARK_END_OF_PROLOG;
procedure PUT_BACK_STRING(str : vstring; start : integer) is
begin
for i in reverse start+1..tstring.len(str) loop
unput( CHAR(str,i) );
end loop;
end PUT_BACK_STRING;
function check_yylex_here return boolean is
begin
return ( (yytext'length >= 2) and then
((yytext(1) = '#') and (yytext(2) = '#')));
end check_yylex_here;
##
begin
if (call_yylex) then
toktype := YYLex;
call_yylex := false;
return toktype;
end if;
if ( eofseen ) then
toktype := End_Of_Input;
else
toktype := YYLex;
end if;
-- this tracing code allows easy tracing of aflex runs
if (trace) then
text_io.new_line(Standard_Error);
text_io.put(Standard_Error, "toktype = :" );
text_io.put(Standard_Error, Token'image(toktype));
text_io.put_line(Standard_Error, ":" );
end if;
if ( toktype = End_Of_Input ) then
eofseen := true;
if ( sectnum = 1 ) then
misc.synerr( "unexpected EOF" );
sectnum := 2;
toktype := SECTEND;
else
if ( sectnum = 2 ) then
sectnum := 3;
toktype := SECTEND;
end if;
end if;
end if;
if ( trace ) then
if ( beglin ) then
int_io.put( Standard_Error, num_rules + 1 );
text_io.put( Standard_Error, ASCII.HT );
beglin := false;
end if;
case toktype is
when '<' | '>'|'^'|'$'|'"'|'['|']'|'{'|'}'|'|'|'('|
')'|'-'|'/'|'?'|'.'|'*'|'+'|',' =>
text_io.put( Standard_Error, Token'image(toktype) );
when NEWLINE =>
text_io.new_line(Standard_Error);
if ( sectnum = 2 ) then
beglin := true;
end if;
when SCDECL =>
text_io.put( Standard_Error, "%s" );
when XSCDECL =>
text_io.put( Standard_Error, "%x" );
when WHITESPACE =>
text_io.put( Standard_Error, " " );
when SECTEND =>
text_io.put_line( Standard_Error, "%%" );
-- we set beglin to be true so we'll start
-- writing out numbers as we echo rules. aflexscan() has
-- already assigned sectnum
if ( sectnum = 2 ) then
beglin := true;
end if;
when NAME =>
text_io.put( Standard_Error, ''' );
text_io.put( Standard_Error, YYText);
text_io.put( Standard_Error, ''' );
when CHAR =>
if ( (yylval < CHARACTER'POS(' ')) or
(yylval = CHARACTER'POS(ASCII.DEL)) ) then
text_io.put( Standard_Error, '\' );
int_io.put( Standard_Error, yylval );
text_io.put( Standard_Error, '\' );
else
text_io.put( Standard_Error, Token'image(toktype) );
end if;
when NUMBER =>
int_io.put( Standard_Error, yylval );
when PREVCCL =>
text_io.put( Standard_Error, '[' );
int_io.put( Standard_Error, yylval );
text_io.put( Standard_Error, ']' );
when End_Of_Input =>
text_io.put( Standard_Error, "End Marker" );
when others =>
text_io.put( Standard_Error, "Something weird:" );
text_io.put_line( Standard_Error, Token'image(toktype));
end case;
end if;
return toktype;
end get_token;
end scanner;